Loading the necessary libraries:
library(tidyverse)
## -- Attaching packages -------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(mapdata)
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(maps)
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
World COVID-19 report from 09/26/2020.
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
rename(Long = "Long_")
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
Summarize the counts for each country and obtain median latitude and longitude from daily_report dataset:
deaths_report <- daily_report %>%
group_by(Country_Region) %>%
filter(Lat != "NA") %>%
filter(Long != "NA") %>%
summarise(Lat = median(Lat), Long = median(Long), Total_Deaths = sum(Deaths))
## `summarise()` ungrouping output (override with `.groups` argument)
World map of COVID-19 deaths reported from 09/26/2020
ggplot(deaths_report, aes(x = Long, y = Lat, size = Total_Deaths/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Death Report',x = '', y = '',
size="Deaths (x1000)") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
Interactive world map of deaths reported from 09/26/2020:
ggplotly(
ggplot(deaths_report, aes(x = Long, y = Lat, text = Country_Region, size = Total_Deaths/1000)) +
borders("world", colour = NA, fill = "grey90") +
theme_bw() +
geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
labs(title = 'World COVID-19 Death Report',x = '', y = '',
size="Deaths (x1000)") +
theme(legend.position = "right") +
coord_fixed(ratio=1.5)
)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Update to Anisa Dhana’s graph layout of the uS based on 09/26/2020 data:
US_daily_report <- daily_report %>%
filter(Country_Region == "US") %>%
filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
"Puerto Rico","Northern Mariana Islands",
"Virgin Islands", "Recovered", "Guam", "Grand Princess",
"District of Columbia", "Diamond Princess")) %>%
filter(Lat > 0)
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(US_daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
borders("state", colour = "white", fill = "grey90") +
geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.4)+
scale_size_continuous(name="Cases", range=c(1,7), trans = "log",
breaks=mybreaks, labels = c("1-4,999",
"5,000-9,999", "10,000-49,999", "50,000-99,999", "100,000+")) +
scale_color_viridis_c(option="viridis",name="Cases", trans = "log",
breaks=mybreaks, labels = c("1-4,999",
"5,000-9,999", "10,000-49,999", "50,000-99,999", "100,000+")) +
# Cleaning up the graph
theme_void() +
guides(colour = guide_legend()) +
labs(title = "Anisa Dhana's layout for COVID-19 Confirmed Cases in the US") +
theme(
legend.position = "bottom",
text = element_text(color = "#22211d"),
plot.background = element_rect(fill = "#ffffff", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA))+
coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 6 rows containing missing values (geom_point).
Number of Confirmed Cases by US County:
library(RColorBrewer)
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
#display.brewer.all(colorblindFriendly = TRUE)
# Get and format the covid report data
US_Sept26_report <- US_daily_report %>%
unite(Key, Admin2, Province_State, sep = ".") %>%
group_by(Key) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Key = tolower(Key))
## `summarise()` ungrouping output (override with `.groups` argument)
# get and format the map data
us <- map_data("state")
counties <- map_data("county") %>%
unite(Key, subregion, region, sep = ".", remove = FALSE)
# Join the 2 tibbles
state_join <- left_join(counties, US_Sept26_report, by = c("Key"))
# sum(is.na(state_join$Confirmed))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
borders("state", colour = "black") +
geom_polygon(data = state_join, aes(fill = Confirmed)) +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "GnBu"),
breaks = c(1,10, 100, 1000, 10000, 100000),
trans = "log10", na.value = "White") +
ggtitle("Number of Confirmed Cases by US County") +
theme_bw()
## Warning: Transformation introduced infinite values in discrete y-axis
Confirmed Cases by County in California:
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>%
filter(Province_State == "California") %>%
group_by(Admin2) %>%
summarize(Confirmed = sum(Confirmed)) %>%
mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
## FIPS = col_double(),
## Admin2 = col_character(),
## Province_State = col_character(),
## Country_Region = col_character(),
## Last_Update = col_datetime(format = ""),
## Lat = col_double(),
## Long_ = col_double(),
## Confirmed = col_double(),
## Deaths = col_double(),
## Recovered = col_double(),
## Active = col_double(),
## Combined_Key = col_character(),
## Incidence_Rate = col_double(),
## `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
ca_us <- subset(us, region == "california")
counties <- map_data("county")
ca_county <- subset(counties, region == "california")
state_join <- left_join(ca_county, daily_report, by = c("subregion" = "Admin2"))
# plot state map
ggplot(data = ca_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuPu"),
trans= "log10") + theme_bw()+
labs(title = "COVID-19 Confirmed Cases in California")
Interactive Plot of Confirmed Cases in California:
ggplotly(
ggplot(data = ca_county, mapping = aes(x = long, y = lat, group = group)) +
coord_fixed(1.3) +
# Add data layer
geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
scale_fill_gradientn(colours =
wes_palette("Moonrise3", 100, type = "continuous")) +
ggtitle("COVID-19 Cases in CA") +
# Cleaning up the graph
labs(x=NULL, y=NULL) +
theme(panel.border = element_blank()) +
theme(panel.background = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(axis.text = element_blank())
)
This report uses COVID-19 data available from (Dong, Du, and Gardner 2020). Data analysis is performed in R(R Core Team 2015) by using Tidyverse (Wickham et al. 2019), specifically ggplot for graphs (Wickham 2016), and (Sievert 2020) for interactive graphs.
An interactive map of COVID-19 confirmed cases in the US as of 09/26/2020:
California, Texas, and Florida have the highest number of confirmed cases to date.
It would be informative to view county cases for these states with high confirmed cases. In the case of California, southern and central California seem to be most affected.
For Texas, the cases seem to be much more spread across the state with high number of cases in the south and east of the state.
## Warning: Transformation introduced infinite values in discrete y-axis
In the case of Florida, counties along the border especially the southern region have the greatest number of confirmed cases.
Dong, E., H. Du, and L. Gardner. 2020. “An Interactive Web-Based Dashboard to Track Covid-19 in Real Time.” Journal Publication. https://doi.org/10.1016/S1473-3099(20)30120-1.
R Core Team. 2015. “R: A Language and Environment for Statistical Computing.” Journal Article. http://www.R-project.org.
Sievert, Carson. 2020. Interactive Web-Based Data Visualization with R, Plotly, and Shiny. https://plotly-r.com.
Wickham, Hadley. 2016. Ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. https://ggplot2.tidyverse.org.
Wickham, Hadley, Mara Averick, Jennifer Bryan, Winston Chang, Lucy D’Agostino McGowan, Romain François, Garrett Grolemund, et al. 2019. “Welcome to the tidyverse.” Journal of Open Source Software 4 (43): 1686.